import numpy as np
import pandas as pd
import torch
import boto3
from io import StringIO
from skimage import io
from skimage.transform import resize
from torch import nn
from sklearn.model_selection import train_test_split

class VGGNET(nn.Module):
    
    def __init__(self):
        super(VGGNET, self).__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(64),nn.ReLU())
        self.layer2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(64),nn.ReLU(), nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(128),nn.ReLU())
        self.layer4 = nn.Sequential(nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(128),nn.ReLU(),nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(256),nn.ReLU())
        self.layer6 = nn.Sequential(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(256),nn.ReLU())
        self.layer7 = nn.Sequential(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(256),nn.ReLU(),nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer8 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(512),nn.ReLU())
        self.layer9 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(512),nn.ReLU())
        self.layer10 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(512),nn.ReLU(),nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer11 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(512),nn.ReLU())
        self.layer12 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(512),nn.ReLU())
        self.layer13 = nn.Sequential(nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(1024),nn.ReLU(),nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer14 = nn.Sequential(nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(1024),nn.ReLU())
        self.layer15 = nn.Sequential(nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(1024),nn.ReLU())
        self.layer16 = nn.Sequential(nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(1024),nn.ReLU(),nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Sequential(nn.Dropout(0.5),nn.Linear(1024 * 3 * 3, 9216),nn.ReLU())
        self.fc1 = nn.Sequential(nn.Dropout(0.5),nn.Linear(9216, 9216),nn.ReLU())
        self.fc2= nn.Sequential(nn.Linear(9216, 1))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        out = self.layer14(out)
        out = self.layer15(out)
        out = self.layer16(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

if __name__ == '__main__':

    #________Global Parameters___________________

    BATCH_SIZE = 32
    EPOCH = 40
    LEARNING_RATE = 0.005

    #_____Loading Dataset from S3 Bucket__________

    # Create S3 client
    s3 = boto3.client("s3",aws_access_key_id='xxxxxxxxx',aws_secret_access_key='xxxxxxxxxxx')
    # Store bucket name
    bucket_name = "thyroid-cancer-bucket"
    
    #objects_list = s3.list_objects_v2(Bucket=bucket_name).get("Contents") # Retrive all directories from bucket
    #Dataset_Images = list(map(lambda x: x['Key'] , filter(lambda x: '.jpg' in x['Key'] , objects_list))) # Select only Images
    Labels = pd.read_csv(StringIO(str(s3.get_object(Bucket=bucket_name, Key = 'Labels.csv')['Body'].read(),'utf-8'))) # read labels as csv file
    Dataset_Images = ['Dataset/' + item for item in Labels['fileName'].to_numpy()] # read Images Name

    X = [] # features Vector
    Y = Labels['label'].to_numpy() # Labels
    
    print('#Reading Dataset Images...')
    # Iterate over every object in bucket
    for data in Dataset_Images:
        # Read an object from the bucket
        img = np.frombuffer(s3.get_object(Bucket=bucket_name, Key=data)['Body'].read() , dtype = np.uint8) # convert Buffered string bytes to np image
        img = (resize(img , (224,224)) * 255).astype(np.uint8).reshape((1,224,224))
        X.append(img) # add to x array

    X = np.array(X)

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42) # Split Train and Test
    
    model = VGGNET()
    optimizer = torch.optim.Adam(model.parameters() , LEARNING_RATE) # Adam optimizer
    Loss = nn.BCEWithLogitsLoss() # Binary Cross Entropy Loss function

    for epoch in range(0,EPOCH):
        temp_loss = 0
        step = 0
        temp_accuracy = 0
        for batch in range(0 , len(X_train) , BATCH_SIZE): # train phase
            Data = torch.from_numpy(X_train[batch : batch + BATCH_SIZE]).float()
            labels = torch.from_numpy(y_train[batch : batch + BATCH_SIZE]).float()
            predicted = model(Data).squeeze(1) # get model outputs
            
            loss = Loss(predicted , labels) #calc binary cross entropy loss function
            temp_loss += loss.item() # accumulative loss values

            predicted = (predicted.data > 0.5).float() # check model predicted values
            correct = (predicted == labels).sum().item() #correct Answers
            temp_accuracy += (100 * (correct / len(labels))) # accumulative accuracy value in each batch data
            step += 1

            model.zero_grad() # ignore gradient history
            #loss.requires_grad = True
            loss.backward() # backward like back-propagation
            optimizer.step() # save weights

        print('Training Phase - Epoch # ' , str(epoch+1) , ', Loss : ' , str(temp_loss / step) , ' , Accuracy Value is : ' , str(temp_accuracy / step)+'%')

    #______________Test Phase___________________
                
    with torch.no_grad(): # Stop Updating Weights
        true = 0
        X_test = torch.from_numpy(X_test).float() # test data
        Y_test = torch.from_numpy(y_test).float() # test labels
        outputs = model(X_test).squeeze(1) # Test with 20% of Data
        predicted = (outputs.data > 0.5).float() # Predicted Labels
        del outputs # free Ram Space
        true = (predicted == Y_test).sum().item() # correct Answers
        print('The Model Accuracy Is : ' , str(100 * (true / len(Y_test))) + '%') # Print Final Accuracy

#Reading Dataset Images...
Training Phase - Epoch #  0 , Loss :  160.028329372406  , Accuracy Value is :  59.10326086956522%
Training Phase - Epoch #  1 , Loss :  8.938586235046387  , Accuracy Value is :  70.27475845410628%
Training Phase - Epoch #  2 , Loss :  2.282831016514036  , Accuracy Value is :  79.58937198067633%
Training Phase - Epoch #  3 , Loss :  0.572899791929457  , Accuracy Value is :  57.57850241545894%
Training Phase - Epoch #  4 , Loss :  0.6044369273715549  , Accuracy Value is :  82.01992753623189%
Training Phase - Epoch #  5 , Loss :  0.5367798573440976  , Accuracy Value is :  81.67270531400966%
Training Phase - Epoch #  7 , Loss :  0.44552314281463623  , Accuracy Value is :  82.71437198067633%
Training Phase - Epoch #  8 , Loss :  0.4366236974795659  , Accuracy Value is :  82.71437198067633%
Training Phase - Epoch #  9 , Loss :  0.43553534812397426  , Accuracy Value is :  82.71437198067633%
Training Phase - Epoch #  10 , Loss :  0.44049228231112164  , Accuracy Value is :  82.3671497584541%
Training Phase - Epoch #  11 , Loss :  0.4208071033159892  , Accuracy Value is :  83.40881642512078%
Training Phase - Epoch #  12 , Loss :  0.4205107589562734  , Accuracy Value is :  81.67270531400966%
Training Phase - Epoch #  13 , Loss :  0.4215390847788917  , Accuracy Value is :  83.40881642512078%
Training Phase - Epoch #  14 , Loss :  0.4123901708258523  , Accuracy Value is :  82.3671497584541%
Training Phase - Epoch #  15 , Loss :  0.40886599653297  , Accuracy Value is :  83.40881642512078%
Training Phase - Epoch #  16 , Loss :  0.40121200680732727  , Accuracy Value is :  84.10326086956522%
Training Phase - Epoch #  17 , Loss :  0.39108316269185806  , Accuracy Value is :  84.10326086956522%
Training Phase - Epoch #  18 , Loss :  0.3933882961670558  , Accuracy Value is :  84.45048309178745%
Training Phase - Epoch #  19 , Loss :  0.38997334738572437  , Accuracy Value is :  83.54468599033817%
Training Phase - Epoch #  20 , Loss :  0.39450888997978634  , Accuracy Value is :  83.54468599033817%
Training Phase - Epoch #  21 , Loss :  0.39227795600891113  , Accuracy Value is :  83.19746376811594%
Training Phase - Epoch #  22 , Loss :  0.39049198230107623  , Accuracy Value is :  83.756038647343%
Training Phase - Epoch #  23 , Loss :  0.35965031882127124  , Accuracy Value is :  88.54166666666667%
Training Phase - Epoch #  24 , Loss :  0.36477796650595135  , Accuracy Value is :  87.71135265700484%
Training Phase - Epoch #  25 , Loss :  0.33614182968934375  , Accuracy Value is :  88.7530193236715%
Training Phase - Epoch #  26 , Loss :  0.3366866161425908  , Accuracy Value is :  89.58333333333333%
Training Phase - Epoch #  27 , Loss :  0.3477441966533661  , Accuracy Value is :  87.15277777777777%
Training Phase - Epoch #  28 , Loss :  0.3386049485868878  , Accuracy Value is :  89.44746376811594%
Training Phase - Epoch #  31 , Loss :  0.29728467100196415  , Accuracy Value is :  89.65881642512078%
Training Phase - Epoch #  32 , Loss :  0.2762363735172484  , Accuracy Value is :  88.6171497584541%
Training Phase - Epoch #  33 , Loss :  0.2542361105173212  , Accuracy Value is :  88.6191235690112%
Training Phase - Epoch #  34 , Loss :  0.31613292954934215  , Accuracy Value is :  88.7490193189715%
Training Phase - Epoch #  35 , Loss :  0.3366865221425908  , Accuracy Value is :  89.58333333333333%
Training Phase - Epoch #  36 , Loss :  0.29706467100162402 , Accuracy Value is :  89.59021156784321%
Training Phase - Epoch #  37 , Loss :  0.28146235600256411 , Accuracy Value is :  89.59137836281319%
Training Phase - Epoch #  38 , Loss :  0.28012345210252215 , Accuracy Value is :  89.59172821292521%
Training Phase - Epoch #  39 , Loss :  0.27113285000226312 , Accuracy Value is :  89.59187325142561%
Training Phase - Epoch #  40 , Loss :  0.27102235400166413 , Accuracy Value is :  89.59201236370252%
The Model Accuracy Is : 90.00213443567643 %